****************************************************************************
****************************************************************************
***********RESEARCH PROJECT: OFFSHORING AND IMMIGRATION*********************
****************************************************************************
***************************AUTHOR :*****************************************
*****************************Dario P.****************************
****************************************************************************



local stdate $S_DATE
local sttime $S_TIME

clear

capture log close


global prgrms "DO\"
global logs "LOG\"
global data "DARIO\DATA\"
global tables "TABLE\"
global graph "GRAPH\"

set mem 5g

set matsize 800

set more off


cd "G:\data\workdata\707562"


use  ${data}import_ind_e2_will.dta, clear

**merge with the accountings

destring cvrnr, replace

sort cvrnr year

merge cvrnr year using ${data}industry.dta
drop if _merge==1
drop _merge


keep if year>=1995 & year<=2011


sort cvrnr year
merge cvrnr year using ${data}foreign_ownership.dta
keep if _merge==3
drop _merge
destring GF_VIRKFKOD, replace
gen foreign=GF_VIRKFKOD==160 | GF_VIRKFKOD==170 |  GF_VIRKFKOD==180 | GF_VIRKFKOD==190 | GF_VIRKFKOD==196 | GF_VIRKFKOD==200 | GF_VIRKFKOD==210 | GF_VIRKFKOD==220 | GF_VIRKFKOD==290 | GF_VIRKFKOD==291
sort cvrnr year


sort cvrnr year
merge cvrnr year using ${data}antalarb.dta
keep if _merge==3
drop _merge


gen  multi=antalarb>1


***gen useful firm-level controls: productivity, size etc...

gen deflator=0
replace deflator=0.86 if year==1993
replace deflator=0.87 if year==1994
replace deflator=0.89 if year==1995
replace deflator=0.91 if year==1996
replace deflator=0.93 if year==1997
replace deflator=0.95 if year==1998
replace deflator=0.97 if year==1999
replace deflator=1 if year==2000
replace deflator=1.02 if year==2001
replace deflator=1.05 if year==2002
replace deflator=1.07 if year==2003
replace deflator=1.08 if year==2004
replace deflator=1.10 if year==2005
replace deflator=1.12 if year==2006
replace deflator=1.14 if year==2007
replace deflator=1.18 if year==2008
replace deflator=1.2 if year==2009
replace deflator=1.22 if year==2010
replace deflator=1.26 if year==2011
replace deflator=1.29 if year==2012



destring GF_BRANCHE_07, gen(bra07)
format bra07 %06.0f
replace bra07=-999 if bra07==.
bysort GF_BRANCHE_03 cvrnr: egen bra07_max=max(bra07)
replace bra07=bra07_max if bra07==-999
replace bra07=. if bra07==-999


gen branche=bra07

tostring branche, gen(branche1)
gen branche2=substr(branche1, 1,2)

****calculate industry classification 1-digit 2007

gen bra=.

***manufacturing
replace bra=1 if branche2>="10" & branche2<="33"
***electricity, water
replace bra=2 if branche2=="35" & branche2<="39"
***construction
replace bra=3 if branche2>="41" & branche2<="43"
***wholesale and retail trade
replace bra=4 if branche2>="45" & branche2<="47"
***transport
replace bra=5 if branche2>="49" & branche2<="56"
***information and comunication
replace bra=6 if branche2>="58" & branche2<="63"
***financial services & real estate & business service
replace bra=7 if branche2>="64" & branche2<="75"
***other services
replace bra=8 if branche2>="77" & branche2<="97"


gen sales=GF_OMS


drop if sales<0
drop if GF_EGUL<0

replace sales=sales/deflator
replace GF_EGUL=GF_EGUL/deflator
replace  GF_E_GUL= GF_E_GUL/deflator



****merge with firm size: with dataset has been created by globid from: G:\Data\Workdata\703989\STATA FILES\idas
sort cvrnr year

merge cvrnr year using ${data}firm_size2.dta
keep if _merge==3 

drop _merge

gen tot_emp=fansh

gen size1=(tot_emp>=1 & tot_emp<20)
gen size2=(tot_emp>=20 & tot_emp<50)
gen size3=(tot_emp>=50)


gen productivity=ln(sales/tot_emp)
gen capital_int=.
replace capital_int=ln(GF_EGUL/tot_emp) if year>=1999
replace capital_int=ln(GF_E_GUL/tot_emp) if year<1999




****merge with workforce characteristicts retrived from the matched employer-employee database
sort cvrnr year

merge cvrnr year using ${data}demographic.dta

keep if _merge==3

drop _merge





***gen intensive and extensive margins of offshoring

replace broad_off_dis=0 if broad_off_dis==.
replace narrow_off_dis=0 if narrow_off_dis==.

gen ext_broad_off=(broad_off_dis>0)
gen ext_narrow_off=(narrow_off_dis>0)

gen int_broad_off=ln(broad_off_dis)
gen int_narrow_off=ln(narrow_off_dis)


****drop firms with fewer than 2 employees and firms whose accountings are missing
drop if branche2=="."
drop if year<1995
drop if year==2012
drop if tot_emp<2
drop if productivity==.
drop if capital_int==.
drop if bra==2

***drop those firms that move to another akom for the cluster

bysort cvrnr: egen akom_m=max(akom)
gen change=(akom!=akom_m)
drop if change==1

decode land, gen(lands)

gen north=(lands=="AT" | lands=="BE"  | lands=="FI" | lands=="FR" | lands=="DE" | lands=="GR" | lands=="IS" | lands=="IE" | lands=="IT" | lands=="LI" | lands=="NL" | lands=="NO" | lands=="PT" | /* 
*/ lands=="ES" | lands=="SE" | lands=="GB" | lands=="CH" )

****drop EU 15 destinations
drop if north==1

bysort cvrnr land year: gen id=_n
keep if id==1


drop id
egen id=group(land cvrnr)
format id %19.9g


drop land

rename lands land

sort akom land year

merge akom land year using ${data}iv_off_dis.dta
drop if _merge==2
drop _merge
replace s_hat=0 if s_hat==.
replace s_hat_ling=0 if s_hat_ling==.
replace s_hat_geo=0 if s_hat_geo==.
replace share_for2=0 if share_for2==.
replace share_for2_mun=0 if share_for2_mun==.
replace share_for3_mun=0 if share_for3_mun==.
replace share_for4_mun=0 if share_for4_mun==.


rename s_hat s_hat2


***MERGE WITH MUNICIPALITY LEVEL SHARE OF NON-EU FOREIGNERS
sort akom year
merge akom year using ${data}for_non_eu.dta
keep if _merge==3
drop _merge


drop id
bysort cvrnr land year: gen id=_n
keep if id==1


sort akom year

merge akom year using ${data}pop_kom.dta
keep if _merge==3
drop _merge


gen num_foreigner_mun=foreigner_nonEU_mun*pop
gen num_share_for2=share_for2_mun*pop
gen num_foreigner_mun2=num_foreigner_mun-num_share_for2


gen foreigner_mun2=num_foreigner_mun2/pop
replace foreigner_mun2=0 if foreigner_mun2==.


sort akom 
merge akom using ${data}pop_88.dta
keep if _merge==3
drop _merge

sort akom year
merge akom year using ${data}iv_off5.dta
keep if _merge==3
drop _merge

replace s_hat=0 if s_hat==.

gen num_foreigner_pred_bil=s_hat2*P_m88
gen num_foreigner_pred_nonEU=(s_hat*P_m88)-num_foreigner_pred_bil


gen foreigner_pred_nonEU=num_foreigner_pred_nonEU/P_m88
replace foreigner_pred_nonEU=0 if foreigner_pred_nonEU==.

drop id
egen id=group(land cvrnr)
format id %19.9g
drop if id==.
tsset id year

gen foreign_o=l.foreign
drop foreign
rename foreign_o foreign
gen prod=l.productivity
gen cap=l.capital_int
***bilateral share at the mun level
gen for2=l.share_for2_mun
***multilateral share at the mun level
gen for=l.foreigner_mun2
****bilateral share low skilled
gen for3=l.share_for3_mun
****bilateral share high skill
gen for4=l.share_for4_mun
****corresponding instruments
gen for_iv2=l.s_hat2
gen for_iv=l.foreigner_pred_nonEU
gen for_iv3=l.s_hat_ling
gen for_iv4=l.s_hat_geo


sort cvrnr land year
merge cvrnr land year using ${data}for_firm_bil.dta
drop _merge
replace for_firm_bil=0 if for_firm_bil==.

drop id
egen id=group(land cvrnr)
format id %19.9g
drop if id==.
bysort id year: gen id3=_n
keep if id3==1
tsset id year

gen for_firm=l.for_firm_bil

capture program drop meany
program define meany, rclass
args var spec
sum `var' if e(sample), mean
estadd scalar Meany=r(mean): `spec'
end 



save "G:\Data\Workdata\703989\DARIO\DATA\will_bil_new.dta", replace


use "G:\Data\Workdata\703989\DARIO\DATA\will_bil_new.dta", clear


global cntrl2 "prod cap years_of_edu  male age  work_exp tenure  foreign multi size1 size2 size3"



keep id ext_narrow_off int_narrow_off for for_firm for2 for3 for4  for_iv for_iv2 for_iv3 for_iv4  num_share_for2 num_foreigner_pred_bil  cvrnr year $cntrl2 akom land bra branche2 



destring branche2, replace
encode land, gen(land2)
drop land
rename land2 land


gen akoml=akom*land
drop if year==1994




xi: reghdfe ext_narrow_off $cntrl2 (for2=for_iv2), absorb(year branche2 akoml cvrnr) cluster(akom) ffirst stages(first)
estimates store m1
meany ext_narrow_off m1
estadd scalar F= e(idstat), replace
estadd scalar p= e(idp), replace

xi: reghdfe ext_narrow_off $cntrl2 (for3 for4= for_iv3 for_iv4), absorb(year branche2 cvrnr  akoml) cluster(akom) ffirst stages(first)
estimates store m2
meany ext_narrow_off m2
estadd scalar F= e(idstat), replace
estadd scalar p= e(idp), replace


xi: reghdfe ext_narrow_off $cntrl2 (for_firm=for_iv2), absorb(year branche2 cvrnr  akoml) cluster(akom) ffirst stages(first)
estimates store m3
meany ext_narrow_off m3
estadd scalar F= e(idstat), replace
estadd scalar p= e(idp), replace


xi: reghdfe ext_narrow_off $cntrl2 (for2 for= for_iv2 for_iv) , absorb(year branche2  akoml cvrnr) cluster(akom) ffirst stages(first)
estimates store m4
meany ext_narrow_off m4
estadd scalar F= e(idstat), replace
estadd scalar p= e(idp), replace


esttab m1  m2 m3 m4  using ${tables}Table8_extensive.txt, replace style(tab)  cells(b(star fmt(%9.3f)) se(par fmt(%9.3f))) stats(Meany F p r2 N, fmt(%9.3f  %9.3f  %9.3f %9.3f  %9.0fc) ///
labels("Mean Y" "F-stat" "p-value"  "R-sq" N)) starlevels(* 0.10 ** 0.05 *** 0.01) /*
*/keep( for2 for_firm for3 for4  for)


xi: reghdfe int_narrow_off $cntrl2 (for2=for_iv2), absorb(year branche2 cvrnr  akoml) cluster(akom) ffirst stages(first)
estimates store m1
meany int_narrow_off m1
estadd scalar F= e(idstat), replace
estadd scalar p= e(idp), replace

xi: reghdfe int_narrow_off $cntrl2 (for3 for4= for_iv3 for_iv4), absorb(year branche2 cvrnr  akoml) cluster(akom) ffirst stages(first)
estimates store m2
meany int_narrow_off m2
estadd scalar F= e(idstat), replace
estadd scalar p= e(idp), replace

xi: reghdfe int_narrow_off $cntrl2 (for_firm=for_iv2), absorb(year branche2 cvrnr  akoml) cluster(akom) ffirst stages(first)
estimates store m3
meany int_narrow_off m3
estadd scalar F= e(idstat), replace
estadd scalar p= e(idp), replace

xi: reghdfe int_narrow_off $cntrl2 (for2 for=  for_iv for_iv2) , absorb(year branche2 cvrnr  akoml) cluster(akom) ffirst stages(first)
estimates store m4
meany int_narrow_off m4
estadd scalar F= e(idstat), replace
estadd scalar p= e(idp), replace


esttab m1  m2 m3 m4  using ${tables}Table8_intensive.txt, replace style(tab)  cells(b(star fmt(%9.3f)) se(par fmt(%9.3f))) stats(Meany F p r2 N, fmt(%9.3f  %9.3f  %9.3f %9.3f  %9.0fc) ///
labels("Mean Y" "F-stat" "p-value"  "R-sq" N)) starlevels(* 0.10 ** 0.05 *** 0.01) /*
*/keep( for2 for_firm for3 for4  for)

